import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

file_path = "C:/Users/agusv/Desktop/Estudio/Tesis/Csv/Excel_Files/Cases_into_Budget.xlsx"
sheet_name = "Base Case into Budget"

data = pd.read_excel(file_path, sheet_name=sheet_name)

statistics = data.describe(percentiles=[0.25, 0.5, 0.75]).T
statistics['Variable'] = statistics.index
statistics = statistics[['Variable', 'mean', 'std', '25%', '50%', '75%']]
statistics.columns = ['Variable', 'Mean', 'Std. Dev.', '25%', '50%', '75%']

output_path = "C:/Users/agusv/Desktop/Estudio/Tesis/Csv/Excel_Files/descriptive_statistics.xlsx"

with pd.ExcelWriter(output_path, engine='openpyxl', mode='a', if_sheet_exists='replace') as writer:
    statistics.to_excel(writer, index=False, sheet_name='Base Case A.1-B.1', float_format="%.2f")

colors = plt.cm.tab20(np.linspace(0, 1, len(data.columns[1:])))

for i, column in enumerate(data.columns[1:]):
    col_data = data[column].dropna()
    bin_count = max(5, min(10, len(col_data) // 5))
    plt.figure(figsize=(10, 6))
    plt.hist(col_data, bins=bin_count, color=colors[i], edgecolor='black', alpha=0.7, density=False)
    plt.title(f"Histogram of {column}")
    plt.xlabel(column)
    plt.ylabel("Frequency")
    plt.show()

print(f"Descriptive statistics saved to {output_path}")
